{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.2"
    },
    "colab": {
      "name": "XLNet.ipynb",
      "provenance": [],
      "include_colab_link": true
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "10dc608a56814702beeff0fe79758796": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_5cb36923988b4c77ab152db3b4fa8def",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_8b27357b78614bda98ffc426ba75649f",
              "IPY_MODEL_6a7fa572de4e4d4d8f3fc75b00e2be8f",
              "IPY_MODEL_f9b3733973f3441e9af29bb01e46ca23"
            ]
          }
        },
        "5cb36923988b4c77ab152db3b4fa8def": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "8b27357b78614bda98ffc426ba75649f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_6b183edc923b409eba88a34f043f0c7f",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_89def102c8be4e5db460c42d4a20ad49"
          }
        },
        "6a7fa572de4e4d4d8f3fc75b00e2be8f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_90be322e061c4b54ac0438c9cde676c7",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 238192,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 238192,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_84c6c8c8911b4f8a9257ef99371a10f6"
          }
        },
        "f9b3733973f3441e9af29bb01e46ca23": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_3f1d7ac441214b46981568b3594fff6b",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 238k/238k [00:00&lt;00:00, 723kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_449b19b495fa4da9aaabee7fb6a7d749"
          }
        },
        "6b183edc923b409eba88a34f043f0c7f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "89def102c8be4e5db460c42d4a20ad49": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "90be322e061c4b54ac0438c9cde676c7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "84c6c8c8911b4f8a9257ef99371a10f6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "3f1d7ac441214b46981568b3594fff6b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "449b19b495fa4da9aaabee7fb6a7d749": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "45f34217f1bf49198c73a07c93338cca": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_7684f733665f452eaa68ecaa39e65f7e",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_452a35e4405e4ae69bd8d3f91f00dd84",
              "IPY_MODEL_3aa2de0540c840faaded56ff44493f6b",
              "IPY_MODEL_b85566cb9b654a50b95d901aaa66d3e9"
            ]
          }
        },
        "7684f733665f452eaa68ecaa39e65f7e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "452a35e4405e4ae69bd8d3f91f00dd84": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_7c8cda1d38a64cd2a22ea20039a569f5",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_96099a1dd715494390286c783d6b0cdb"
          }
        },
        "3aa2de0540c840faaded56ff44493f6b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_bf20d8562cf7496892403d2ddbc2f718",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 1351,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1351,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_0ea039e008484557aecc4b37b0974dd1"
          }
        },
        "b85566cb9b654a50b95d901aaa66d3e9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_b643517cfd694af09de023ef729b9b01",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 1.35k/1.35k [00:00&lt;00:00, 47.2kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_22abbc7b49ec4273b0911703fd2a47b5"
          }
        },
        "7c8cda1d38a64cd2a22ea20039a569f5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "96099a1dd715494390286c783d6b0cdb": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "bf20d8562cf7496892403d2ddbc2f718": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "0ea039e008484557aecc4b37b0974dd1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "b643517cfd694af09de023ef729b9b01": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "22abbc7b49ec4273b0911703fd2a47b5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "8bf1f05dfd094f9197780d5bf2de200d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_4ce05cea664e42d69a20cb5ff1c18a8e",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_17cb780cd2174db4ba008b33fe706d3f",
              "IPY_MODEL_a3534d98ee334dec8a30787e70f6811c",
              "IPY_MODEL_b90f4f8e8e8f4291a78a1603b5557c5a"
            ]
          }
        },
        "4ce05cea664e42d69a20cb5ff1c18a8e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "17cb780cd2174db4ba008b33fe706d3f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_df2aa792a1a54a5dbd5072987cc77b73",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_8212309fc2344d3394702659f613d68e"
          }
        },
        "a3534d98ee334dec8a30787e70f6811c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_47e33ea88cef4c489ddc3d970fb1e1b8",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 1637757076,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1637757076,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_44fa6b0807844d499d5dc6b5f20fee43"
          }
        },
        "b90f4f8e8e8f4291a78a1603b5557c5a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_3b529a07836246f09c02e12e240d8a0d",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 1.64G/1.64G [00:38&lt;00:00, 45.6MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_16c19f5e051641afa56fc45fdfd965fc"
          }
        },
        "df2aa792a1a54a5dbd5072987cc77b73": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "8212309fc2344d3394702659f613d68e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "47e33ea88cef4c489ddc3d970fb1e1b8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "44fa6b0807844d499d5dc6b5f20fee43": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "3b529a07836246f09c02e12e240d8a0d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "16c19f5e051641afa56fc45fdfd965fc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/agemagician/ProtTrans/blob/master/Embedding/PyTorch/Basic/ProtXLNet.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "UjIDWEXCJ0b6"
      },
      "source": [
        "<h3> Extracting protein sequences' features using ProtXLNet pretrained-model <h3>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "BD2-XqmqJ0b7"
      },
      "source": [
        "<b>1. Load necessry libraries including huggingface transformers<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "a3bTean9J-7k",
        "outputId": "7f839b39-bc68-4b52-f6e5-9c3fdc5612d0",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "source": [
        "!pip install -q transformers sentencepiece"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "\u001b[K     |████████████████████████████████| 2.6 MB 17.9 MB/s \n",
            "\u001b[K     |████████████████████████████████| 1.2 MB 56.8 MB/s \n",
            "\u001b[K     |████████████████████████████████| 895 kB 60.5 MB/s \n",
            "\u001b[K     |████████████████████████████████| 636 kB 67.8 MB/s \n",
            "\u001b[K     |████████████████████████████████| 3.3 MB 54.7 MB/s \n",
            "\u001b[?25h"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "aIy-Dk-OJ0b8"
      },
      "source": [
        "import torch\n",
        "from transformers import XLNetTokenizer, AutoModel, pipeline\n",
        "import re\n",
        "import numpy as np\n",
        "import os\n",
        "import requests\n",
        "from tqdm.auto import tqdm"
      ],
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "B7Op-POnJ0cI"
      },
      "source": [
        "<b>2. Load the vocabulary and ProtXLNet Model<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "NiWe3_MEJ0cJ",
        "outputId": "744ba8fd-357d-4555-ad4c-d74f66770668",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 81,
          "referenced_widgets": [
            "10dc608a56814702beeff0fe79758796",
            "5cb36923988b4c77ab152db3b4fa8def",
            "8b27357b78614bda98ffc426ba75649f",
            "6a7fa572de4e4d4d8f3fc75b00e2be8f",
            "f9b3733973f3441e9af29bb01e46ca23",
            "6b183edc923b409eba88a34f043f0c7f",
            "89def102c8be4e5db460c42d4a20ad49",
            "90be322e061c4b54ac0438c9cde676c7",
            "84c6c8c8911b4f8a9257ef99371a10f6",
            "3f1d7ac441214b46981568b3594fff6b",
            "449b19b495fa4da9aaabee7fb6a7d749",
            "45f34217f1bf49198c73a07c93338cca",
            "7684f733665f452eaa68ecaa39e65f7e",
            "452a35e4405e4ae69bd8d3f91f00dd84",
            "3aa2de0540c840faaded56ff44493f6b",
            "b85566cb9b654a50b95d901aaa66d3e9",
            "7c8cda1d38a64cd2a22ea20039a569f5",
            "96099a1dd715494390286c783d6b0cdb",
            "bf20d8562cf7496892403d2ddbc2f718",
            "0ea039e008484557aecc4b37b0974dd1",
            "b643517cfd694af09de023ef729b9b01",
            "22abbc7b49ec4273b0911703fd2a47b5"
          ]
        }
      },
      "source": [
        "tokenizer = XLNetTokenizer.from_pretrained(\"Rostlab/prot_xlnet\", do_lower_case=False)"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "10dc608a56814702beeff0fe79758796",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/238k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "45f34217f1bf49198c73a07c93338cca",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/1.35k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TIAg52ixJ0cN",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 100,
          "referenced_widgets": [
            "8bf1f05dfd094f9197780d5bf2de200d",
            "4ce05cea664e42d69a20cb5ff1c18a8e",
            "17cb780cd2174db4ba008b33fe706d3f",
            "a3534d98ee334dec8a30787e70f6811c",
            "b90f4f8e8e8f4291a78a1603b5557c5a",
            "df2aa792a1a54a5dbd5072987cc77b73",
            "8212309fc2344d3394702659f613d68e",
            "47e33ea88cef4c489ddc3d970fb1e1b8",
            "44fa6b0807844d499d5dc6b5f20fee43",
            "3b529a07836246f09c02e12e240d8a0d",
            "16c19f5e051641afa56fc45fdfd965fc"
          ]
        },
        "outputId": "906bd3a5-c1d3-4c74-c963-471a5447c14e"
      },
      "source": [
        "model = AutoModel.from_pretrained(\"Rostlab/prot_xlnet\")"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "8bf1f05dfd094f9197780d5bf2de200d",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/1.64G [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "text": [
            "Some weights of the model checkpoint at Rostlab/prot_xlnet were not used when initializing XLNetModel: ['lm_loss.bias', 'lm_loss.weight']\n",
            "- This IS expected if you are initializing XLNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
            "- This IS NOT expected if you are initializing XLNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "3jkCctT1J0cS"
      },
      "source": [
        "<b>3. Load the model into the GPU if avilabile<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cifZjuMbJ0cS"
      },
      "source": [
        "fe = pipeline('feature-extraction', model=model, tokenizer=tokenizer,device=0)"
      ],
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "kCAdlY5YJ0cW"
      },
      "source": [
        "<b>4. Create or load sequences and map rarely occured amino acids (U,Z,O,B) to (unk)<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5nYqFMDRJ0cX"
      },
      "source": [
        "sequences_Example = [\"A E T C Z A O\",\"S K T Z P\"]"
      ],
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5fSzAeM9J0cc"
      },
      "source": [
        "sequences_Example = [re.sub(r\"[UZOBX]\", \"<unk>\", sequence) for sequence in sequences_Example]"
      ],
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zc2cEpE7J0cg"
      },
      "source": [
        "<b>5. Extracting sequences' features and covert the output to numpy if needed<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rNowPJYgJ0ch"
      },
      "source": [
        "embedding = fe(sequences_Example)"
      ],
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "v7QLt3qWJ0cl"
      },
      "source": [
        "embedding = np.array(embedding)"
      ],
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uocPT992J0cy",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "6656a4dc-d186-4bd7-b9b0-458ee5a42277"
      },
      "source": [
        "print(embedding)"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[[[ 5.69580197e-01 -8.12229991e-01  1.51267636e+00 ... -3.47371846e-01\n",
            "   -1.97737467e+00  1.02282596e+00]\n",
            "  [ 2.76625063e-02 -6.71196759e-01  9.98871863e-01 ...  7.27692693e-02\n",
            "   -1.62625885e+00 -8.44621472e-03]\n",
            "  [ 2.20986292e-01 -5.26815653e-01  6.64871275e-01 ...  4.78142016e-02\n",
            "   -1.39787090e+00  3.08236480e-01]\n",
            "  ...\n",
            "  [-3.64926666e-01 -8.19322467e-01  4.81531680e-01 ...  2.35715330e-01\n",
            "   -6.73881948e-01 -1.06030154e+00]\n",
            "  [ 4.51355964e-01 -8.96943450e-01  4.00961459e-01 ... -1.93732351e-01\n",
            "   -5.60827017e-01 -2.78551280e-01]\n",
            "  [ 3.18279088e-01 -1.61193037e+00  4.94405985e-01 ... -2.51359075e-01\n",
            "   -1.32739067e-01 -1.23081710e-02]]\n",
            "\n",
            " [[ 1.91230476e-01  1.84453130e-02 -1.82836526e-03 ... -4.36504066e-01\n",
            "    2.18435768e-02 -1.59097388e-01]\n",
            "  [ 2.63837665e-01 -6.02969602e-02 -1.12777390e-02 ... -2.28306949e-01\n",
            "   -3.21158886e-01  1.10596269e-01]\n",
            "  [ 8.65128040e-01 -1.61870003e-01 -1.75776944e-01 ...  3.56552243e-01\n",
            "   -2.34118134e-01  4.93945815e-02]\n",
            "  ...\n",
            "  [ 4.84942645e-01  6.79004192e-01  3.08672220e-01 ...  7.47225210e-02\n",
            "   -3.86942685e-01 -3.16800475e-01]\n",
            "  [ 8.82356644e-01  1.08732736e+00  1.10506028e-01 ...  2.86995769e-01\n",
            "   -1.13194609e+00  7.79403299e-02]\n",
            "  [ 5.38626790e-01 -1.53350951e-02 -6.67607844e-01 ...  6.61961555e-01\n",
            "    1.94277409e-02 -7.00099409e-01]]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "8wGuhtK3J0c1"
      },
      "source": [
        "<b>Optional: Remove padding ([PAD]) and special tokens ([CLS],[SEP]) that is added by ProtXLNet model<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5ZSpwbj5J0c1"
      },
      "source": [
        "features = [] \n",
        "for seq_num in range(len(embedding)):\n",
        "    seq_len = len(sequences_Example[seq_num].replace(\" \", \"\"))\n",
        "    padded_seq_len = len(embedding[seq_num])\n",
        "    start_Idx = padded_seq_len-seq_len-2\n",
        "    end_Idx = padded_seq_len-2\n",
        "    seq_emd = embedding[seq_num][start_Idx:end_Idx]\n",
        "    features.append(seq_emd)"
      ],
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hTJ1lTjQJ0c6",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "723a4805-74a4-46ea-e160-dd4319bf916d"
      },
      "source": [
        "print(features)"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[array([[ 0.02766251, -0.67119676,  0.99887186, ...,  0.07276927,\n",
            "        -1.62625885, -0.00844621],\n",
            "       [ 0.22098629, -0.52681565,  0.66487128, ...,  0.0478142 ,\n",
            "        -1.3978709 ,  0.30823648],\n",
            "       [ 0.98757803, -1.0321219 ,  0.99680513, ..., -0.338559  ,\n",
            "        -1.51521862,  1.05237138],\n",
            "       [ 0.70799828, -0.664361  ,  0.85833752, ..., -0.02473303,\n",
            "        -1.51670814, -0.21759628],\n",
            "       [-0.14213684, -0.86483902,  0.8144275 , ..., -0.32999068,\n",
            "        -0.23385352, -1.71954966],\n",
            "       [-0.36492667, -0.81932247,  0.48153168, ...,  0.23571533,\n",
            "        -0.67388195, -1.06030154]]), array([], shape=(0, 1024), dtype=float64)]\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}